%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

% APPENDIX: ESTIMATE THE EFFECT OF TRANSPORTATION COSTS ON LOCAL YIELDS IN THE AMAZON

% "DEFORESTATION IN THE AMAZON:
% A UNIFIED FRAMEWORK FOR ESTIMATION AND POLICY ANALYSIS"

% by Eduardo Souza-Rodrigues

% This version: November 2018

% OBSERVATIONS: 
% 1. Run this program after running "dem_def_reg.m"
% 2. The data set has to be loaded before running this program.

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% (A) PREPARE THE DATA SET

% I. Organize Data
    if farm == 1
    
        % Load the Data Set - Small Farms
        data1 = [data_folder,'\land_use_matlab_small.txt'];
        load(data1);
        data = land_use_matlab_small;
        clear data1 land_use_matlab_small;
    
    elseif farm == 2

        % Load the Data Set - Small-Medium Farms
        data1 = [data_folder,'\land_use_matlab_small_medium.txt'];
        load(data1);
        data = land_use_matlab_small_medium;
        clear data1 land_use_matlab_small_medium;
        
    elseif farm == 3

        % Load the Data Set - Medium-Large Farms
        data1 = [data_folder,'\land_use_matlab_medium_large.txt'];
        load(data1);
        data = land_use_matlab_medium_large;
        clear data1 land_use_matlab_medium_large;     
        
    elseif farm == 4

        % Load the Data Set - Large Farms
        data1 = [data_folder,'\land_use_matlab_large.txt'];
        load(data1);
        data = land_use_matlab_large;
        clear data1 land_use_matlab_large;
        
    end
    
% Name of the variables
Code        = data(:,1);      % Municipality Identifier (IBGE's code)
Y           = data(:,2);      % Proportion of Deforestation
Qa1         = data(:,3);      % Productivity Index - Only Crops
Qa2         = data(:,4);      % Productivity Index - Crops and Pasture
Qrice       = data(:,5);      % Yields Rice
Qman        = data(:,6);      % Yields Manioc (Cassava)
Qcorn       = data(:,7);      % Yields Corn
Qsoy        = data(:,8);      % Yields Soy
Qbean       = data(:,9);      % Yields Beans
TC          = data(:,10);     % Transportation cost to the port
Alt         = data(:,11);     % Altitude
Temp        = data(:,12);     % Temperature
Rain        = data(:,13);     % Rain
Slope       = data(:,14);     % Average Slope
Soil        = data(:,15:18);  % Proportion of soil quality = [prop_potenc2 prop_potenc3 prop_potenc4 prop_potenc5]
Pop         = data(:,19);     % Population
Mining      = data(:,20);     % Presence of Mining
Powerplant  = data(:,21);     % Presence of Powerplant
PPNeighbor  = data(:,22);     % Indicator for Powerplant Neighbor
DistPA      = data(:,23);     % Straight Line Distance to nearest Protected Area
Title       = data(:,24);     % Proportion of the Private Area with Land Title
Fines2005   = data(:,25);     % Fines up to 2005
Fines2003   = data(:,26);     % Fines up to 2003
D           = data(:,27:28);  % Straight Line Distances = [dist to port, dist to state capital]
Ibama       = data(:,29);     % Distance to the Nearest Ibama Agency
CarbonDiff  = data(:,30);     % Difference between Forest Carbon Stock and Deforested Carbon Stock
Area        = data(:,31);     % Area occupied by farms of the size considered
G_IR        = data(:,32);     % Clusters, IBGE Immediate Regions
G50         = data(:,33);     % Clusters, G = 50
G75         = data(:,34);     % Clusters, G = 75
G100        = data(:,35);     % Clusters, G = 100
G125        = data(:,36);     % Clusters, G = 125
G150        = data(:,37);     % Clusters, G = 150
Wpop        = data(:,38);     % Spatially Lagged Population
Wmining     = data(:,39);     % Spatially Lagged Mining
Wpowerplant = data(:,40);     % Spatially Lagged Powerplants
WdistPA     = data(:,41);     % Spatially Lagged Proximity to Protected Areas

% Number of observations
T  = size(Y,1);    

% Truncate Y so that 0 < Y < 1
Yt = Y + (myzero - Y).*(Y<myzero) + (1 - myzero - Y).*(Y>1-myzero);

% Compute log odds ratio
lnY = log(Yt./(1-Yt));

% Spatially Lagged Regressors
WX = [Wpop, Wmining, Wpowerplant, WdistPA];

% Set of Covariates (Model Specification):
if model == 0
    
    % Main Specification
    X     = [Alt Temp Rain Slope Soil DistPA Mining Powerplant PPNeighbor Pop Title Fines2005 Ibama WX];
        
elseif model == 1
    
    % No Population
    X     = [Alt Temp Rain Slope Soil DistPA Mining Powerplant PPNeighbor Title Fines2005 Ibama WX];
    
elseif model == 2
    
    % No land title proxy
    X     = [Alt Temp Rain Slope Soil DistPA Mining Powerplant PPNeighbor Pop Fines2005 Ibama WX];
    
elseif model == 3
    
    % No distance to ibama, nor fines
    X     = [Alt Temp Rain Slope Soil DistPA Mining Powerplant PPNeighbor Pop Title WX];
    
elseif model == 4
    
    % No distance to ibama
    X     = [Alt Temp Rain Slope Soil DistPA Mining Powerplant PPNeighbor Pop Title Fines2005 WX];
    
elseif model == 5
    
    % No Fines
    X     = [Alt Temp Rain Slope Soil DistPA Mining Powerplant PPNeighbor Pop Title Ibama WX];
    
elseif model == 6
    
    % No Spatially Lagged Regressors
    X     = [Alt Temp Rain Slope Soil DistPA Mining Powerplant PPNeighbor Pop Title Fines2005 Ibama];    
    
end

% Dimension of X        
dx = size(X,2);   
   
% All Regressors -- Including TC and the constant term
Xc  = [TC X ones(T,1)];
dxc = size(Xc,2);           

% Set of Instruments
if d_iv == 1
    D = D(:,1); % only dist_port as IV
end

% Instrumental Variables
Z  = [D X];
dz = size(Z,2);    % Dimension of Z
    
% All Instruments -- Including constant term
Zc  = [Z ones(T,1)];
dzc = size(Zc,2);   % Dimension of Zc

% Vectors That Are Not Needed Anymore (Exogenous Regressors)
clear Alt Temp Rain Slope Soil  Mining Powerplant Pop Title
clear G_IR G50 G75 G100 G125 G150    


%% (B) ESTIMATE YIELDS REGRESSION MODEL

% Productivity Index
Qa  = [Qa2, Qa1, Qsoy, Qcorn, Qrice, Qbean, Qman];

% Projection Matrix
Pz = Zc*((Zc'*Zc)\Zc');

% Projection on X
Xc_hat = Pz*Xc;

% Exclude outliers: large Q for manioc
index = find(Qman<10);

% Coefficients
qa_beta_iv = (Xc_hat(index,:)'*Xc_hat(index,:))\(Xc_hat(index,:)'*Qa(index,:));

% Residuals
Uqa = Qa(index,:)  - Xc(index,:) * qa_beta_iv;
%Uqa = lnQ - Xc * qa_beta_iv;

for i = 1:size(Qa,2)
        
    % Robust Variance Matrix
    qa_var = inv(Xc_hat(index,:)'*Xc_hat(index,:))*(Xc_hat(index,:)'*diag(Uqa(:,i).^2)*Xc_hat(index,:))*inv(Xc_hat(index,:)'*Xc_hat(index,:));
    
    % Standard Error
    qa_se(:,i) = sqrt(diag(qa_var));
    
    % t-statistic
    qa_t(:,i) = qa_beta_iv(:,i)./sqrt(diag(qa_var));
    
    % 95%-Confidence Interval
    qa_LB(:,i) = qa_beta_iv(:,i) - critical * sqrt(diag(qa_var));
    qa_UB(:,i) = qa_beta_iv(:,i) + critical * sqrt(diag(qa_var));
    
    % Overidentification Test
    Jqa(:,i)       = (Zc(index,:)'*Uqa(:,i))' * inv(Zc(index,:)'*diag(Uqa(:,i).^2)*Zc(index,:)) * (Zc(index,:)'*Uqa(:,i));
    pvalue_qa(:,i) = 1 - chi2cdf(Jqa(:,i),dzc-dxc); 
    
end


%% (B) SHOW ESTIMATED RESULTS

disp('----------------------------------------------------------------------------')
if farm == 1        
    disp('APPENDIX: YIELDS REGRESSIONS (2SLS), SMALL FARMS')       
elseif farm == 2
    disp('APPENDIX: YIELDS REGRESSIONS (2SLS), SMALL-MEDIUM FARMS')
elseif farm == 3
    disp('APPENDIX: YIELDS REGRESSIONS (2SLS), MEDIUM-LARGE FARMS')
elseif farm == 4
    disp('APPENDIX: YIELDS REGRESSIONS (2SLS), LARGE FARMS')
end

% Show the Coefficients, T-Statistics, and Confidence Intervals
table(qa_beta_iv(1,:)',qa_se(1,:)',qa_LB(1,:)',qa_UB(1,:)',...
    'VariableNames',{'Coefficient','se','LB_Conf_Interval_95','UB_Conf_Interval_95'},...
    'RowNames',{'Index - Crops-Pasture','Index - Only Crops','Soy','Corn','Rice','Beans','Manioc'})

disp([char('NUMBER OBS:'), blanks(numel(length(Qa(index))))', num2str(length(Qa(index)))])
disp([char('AVERAGE LOCAL YIELDS, CROPS-PASTURE:'), blanks(numel(mean(Qa(:,1))))', num2str(mean(Qa(:,1)))])
disp([char('AVERAGE LOCAL YIELDS, ONLY CROPS:'), blanks(numel(mean(Qa(:,2))))', num2str(mean(Qa(:,2)))])
disp('----------------------------------------------------------------------------')
pause

clear index


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


